######################################
###                                ###
###   Women and Party Building     ###
###                                ###
### code_wrangling_Supplementary.R ###
###                                ###
######################################

# This script builds the non-RDD datasets used in main text and appendix

rm( list=ls() )
library(stringr)
library(lubridate)
library(dplyr)


cat("Building additional analysis datasets...\n")

# This line uses the rstudioapi package to set the working directory to the same folder where this script is saved.
try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))

# Alternatively, you can use setwd( PATH ) to set this directory manually




### Creates data on gender balance of party membership recruitment for major Brazilian parties
### (Used in FIGURE 1)

member.files <- list.files("../1_data/cleaned/membership_data/")
parties.sorted <- c( "PCDOB", "PT", "PDT",  "MDB", "PSD", "PSDB",  "PP",  "DEM",  "PSL")
member.files <- member.files[member.files %in% paste("data_cleaned_filiados_", parties.sorted, ".rds", sep="")]
parties <- c()
men <- c()
women <- c()
missing <- c()
mun.data <- c()
# Cycles back through the cleaned individual-level membership files and counts the number of recruits from each gender
for(file in member.files){
  party <- str_remove(str_remove(file, ".rds"), "data_cleaned_filiados_")
  data <- readRDS(paste("../1_data/cleaned/membership_data/", file, sep=""))
  data <- data[data$Term >= 2004,]
  men <- c(men, sum(!(data$Gender %in% "F"), na.rm=T))
  women <- c(women, sum(data$Gender %in% "F", na.rm=T))
  missing <- c(missing, sum(is.na(data$Gender)))
  parties <- c(parties, party)
} 

parties.sub <- parties[parties %in% parties.sorted]
parties.order <- order(match(parties.sub, parties.sorted))

gender.members <- rbind(women/(women+men), men/(women+men))
gender.members <- gender.members[, parties %in% parties.sub]
gender.members <- gender.members[, parties.order]
colnames(gender.members) <- parties.sorted
gender.members <- cbind(gender.members, c(NA, NA), c(NA, NA))

saveRDS(gender.members, "../1_data/analysis/data_analysis_MemberBalance.rds")
rm( list=ls() )







### Creates data on elite and mass party-switching by gender
### (Used in TABLE 1)

# Loads data on members who switched parties (created in code_wrangling_MembersGender.R)
switchers <- readRDS("../1_data/cleaned/data_cleaned_Switchers.rds")
switchers$NUM_TITULO_ELEITORAL_CANDIDATO <- str_pad(switchers$ID, width=12, side="left", pad="0")
switchers$NewParty <- switchers$Party

# Loads TSE's data on candidates
candidates <- readRDS("../1_data/cleaned/data_cleaned_Candidates.rds")
candidates <- candidates[candidates$ANO_ELEICAO > 2000 & candidates$NUM_TITULO_ELEITORAL_CANDIDATO > 0,]

first.council <- aggregate(ANO_ELEICAO ~ NUM_TITULO_ELEITORAL_CANDIDATO + Party, data=candidates[candidates$DESCRICAO_CARGO == "VEREADOR",], FUN="min")
first.mayor <- aggregate(ANO_ELEICAO ~ NUM_TITULO_ELEITORAL_CANDIDATO + Party, data=candidates[candidates$DESCRICAO_CARGO == "PREFEITO",], FUN="min")
first.statedeputy <- aggregate(ANO_ELEICAO ~ NUM_TITULO_ELEITORAL_CANDIDATO + Party, data=candidates[candidates$DESCRICAO_CARGO == "DEPUTADO ESTADUAL",], FUN="min")
first.federaldeputy <- aggregate(ANO_ELEICAO ~ NUM_TITULO_ELEITORAL_CANDIDATO + Party, data=candidates[candidates$DESCRICAO_CARGO == "DEPUTADO FEDERAL",], FUN="min")
first.governor <- aggregate(ANO_ELEICAO ~ NUM_TITULO_ELEITORAL_CANDIDATO + Party, data=candidates[candidates$DESCRICAO_CARGO == "GOVERNADOR",], FUN="min")

later.candidacies <- candidates[, c("ANO_ELEICAO", "NUM_TITULO_ELEITORAL_CANDIDATO", "Party")]
names(later.candidacies) <- c("Later.Year", "NUM_TITULO_ELEITORAL_CANDIDATO", "Later.Party")

first.mayor <- merge(first.mayor, switchers[, c("NUM_TITULO_ELEITORAL_CANDIDATO", "Year", "NewParty", "Gender")], by="NUM_TITULO_ELEITORAL_CANDIDATO", all.x=T)
# In order to qualify as an "elite party switch," the candidate must have changed parties *after* their first run for office at that level of government
first.mayor$Switched <- first.mayor$Year > first.mayor$ANO_ELEICAO & first.mayor$Party != first.mayor$NewParty 
# A missing value means that the candidate does not appear in the party-switchers dataset, and therefore, that they never switched parties
first.mayor$Switched[is.na(first.mayor$Switched) ] <- F 
# Now compare to the full candidates' dataset (the reason for this extra step is that some (defunct) parties are missing from the party membership dataset)
first.mayor <- merge(first.mayor, later.candidacies, by="NUM_TITULO_ELEITORAL_CANDIDATO")
first.mayor$Switched <- first.mayor$Switched | (first.mayor$Later.Year > first.mayor$ANO_ELEICAO & first.mayor$Later.Party != first.mayor$Party)
first.mayor <- aggregate(Switched ~ NUM_TITULO_ELEITORAL_CANDIDATO + Gender, data=first.mayor, FUN="max")
results.mayor <- prop.table(table(first.mayor$Gender, first.mayor$Switched), margin=1)[,2]

first.statedeputy <- merge(first.statedeputy, switchers[, c("NUM_TITULO_ELEITORAL_CANDIDATO", "Year", "NewParty", "Gender")], by="NUM_TITULO_ELEITORAL_CANDIDATO", all.x=T)
first.statedeputy$Switched <- first.statedeputy$Year > first.statedeputy$ANO_ELEICAO & first.statedeputy$Party != first.statedeputy$NewParty
first.statedeputy$Switched[is.na(first.statedeputy$Switched) ] <- F
first.statedeputy <- merge(first.statedeputy, later.candidacies, by="NUM_TITULO_ELEITORAL_CANDIDATO")
first.statedeputy$Switched <- first.statedeputy$Switched | (first.statedeputy$Later.Year > first.statedeputy$ANO_ELEICAO & first.statedeputy$Later.Party != first.statedeputy$Party)
first.statedeputy <- aggregate(Switched ~ NUM_TITULO_ELEITORAL_CANDIDATO + Gender, data=first.statedeputy, FUN="max")
results.statedeputy <- prop.table(table(first.statedeputy$Gender, first.statedeputy$Switched), margin=1)[,2]

first.federaldeputy <- merge(first.federaldeputy, switchers[, c("NUM_TITULO_ELEITORAL_CANDIDATO", "Year", "NewParty", "Gender")], by="NUM_TITULO_ELEITORAL_CANDIDATO", all.x=T)
first.federaldeputy$Switched <- first.federaldeputy$Year > first.federaldeputy$ANO_ELEICAO & first.federaldeputy$Party != first.federaldeputy$NewParty
first.federaldeputy$Switched[is.na(first.federaldeputy$Switched) ] <- F
first.federaldeputy <- merge(first.federaldeputy, later.candidacies, by="NUM_TITULO_ELEITORAL_CANDIDATO")
first.federaldeputy$Switched <- first.federaldeputy$Switched | (first.federaldeputy$Later.Year > first.federaldeputy$ANO_ELEICAO & first.federaldeputy$Later.Party != first.federaldeputy$Party)
first.federaldeputy <- aggregate(Switched ~ NUM_TITULO_ELEITORAL_CANDIDATO + Gender, data=first.federaldeputy, FUN="max")
results.federaldeputy <- prop.table(table(first.federaldeputy$Gender, first.federaldeputy$Switched), margin=1)[,2]

first.council <- merge(first.council, switchers[, c("NUM_TITULO_ELEITORAL_CANDIDATO", "Year", "NewParty", "Gender")], by="NUM_TITULO_ELEITORAL_CANDIDATO", all.x=T)
first.council$Switched <- first.council$Year > first.council$ANO_ELEICAO & first.council$Party != first.council$NewParty
first.council$Switched[is.na(first.council$Switched) ] <- F
first.council <- merge(first.council, later.candidacies, by="NUM_TITULO_ELEITORAL_CANDIDATO")
first.council$Switched <- first.council$Switched | (first.council$Later.Year > first.council$ANO_ELEICAO & first.council$Later.Party != first.council$Party)
first.council <- aggregate(Switched ~ NUM_TITULO_ELEITORAL_CANDIDATO + Gender, data=first.council, FUN="max")
results.council <- prop.table(table(first.council$Gender, first.council$Switched), margin=1)[,2]

# Gets mass party membership data and calculates party switching rate by gender
ordinary <- readRDS("../1_data/cleaned/data_cleaned_membership_PartyMunicipalityTerm.rds")
results.ordinary <- c(sum(ordinary$Switched.Female, na.rm=T) / sum(ordinary$Female.Recruits, na.rm=T), sum(ordinary$Switched.Male, na.rm=T) / sum(ordinary$Male.Recruits, na.rm=T))

# Combines data together
results <- round(rbind(results.ordinary, results.council, results.mayor, results.statedeputy, results.federaldeputy), 3)
rownames(results) <- c("Ordinary Members", "City Council Candidates", "Mayoral Candidates", "State Deputies Candidates", "Federal Deputies Candidates")
colnames(results) <- c("Women", "Men")

saveRDS(results, "../1_data/analysis/data_analysis_PartySwitchingByGender.rds")
rm( list=ls() )






### Creates data for two-way fixed effects analysis
### (Used in FIGURE J.1 and TABLE J.1)

partymun <- readRDS("../1_data/cleaned/data_cleaned_membership_PartyMunicipalityTerm.rds")
partymun$ANO_ELEICAO <- partymun$PERIODO + 4
partymun <- partymun[partymun$ANO_ELEICAO > 2000,]
partymun$Year <- partymun$ANO_ELEICAO
partymun$Party.Mun <- paste(partymun$Party, partymun$MunCode)

candidates <- readRDS("../1_data/cleaned/data_cleaned_Candidates.rds")
Mayors <- candidates[candidates$DESCRICAO_CARGO == "PREFEITO" & candidates$DESC_SIT_TOT_TURNO == "ELEITO",]
Mayors$Party.Mun <- paste(Mayors$SIGLA_PARTIDO, Mayors$MunCode)

# Code treatment
data <- partymun[partymun$Party.Mun %in% Mayors$Party.Mun,]
Mayors$Party.Mun.Year <- paste(Mayors$Party.Mun, Mayors$ANO_ELEICAO)
women.mayors <- Mayors$Party.Mun.Year[Mayors$DESCRICAO_SEXO == "FEMININO"]
men.mayors <- Mayors$Party.Mun.Year[Mayors$DESCRICAO_SEXO == "MASCULINO"]
data$Treated.Women <- paste(data$Party.Mun, data$ANO_ELEICAO) %in% women.mayors
data$Treated.Men <- paste(data$Party.Mun, data$ANO_ELEICAO) %in% men.mayors
data$Opposition <- as.numeric(!data$Treated.Women & !data$Treated.Men)
data$Gender.Gap <- data$Male.Recruit.Share - data$Female.Recruit.Share

data <- data[, c("MunCode", "Year", "Treated.Women", "Treated.Men", "Opposition", "Gender.Gap", "Recruit.Share", "Female.Recruit.Share", "Male.Recruit.Share")]

saveRDS(data, "../1_data/analysis/data_analysis_MunicipalitiesPanel.rds")
rm( list=ls() )






### Creates data for campaign donations and career-path analyses
### (Used in Appendix TABLE M.1, TABLE N.1, and TABLE N.2)

candidates <- readRDS("../1_data/cleaned/data_cleaned_Candidates.rds")
vars <- c("ANO_ELEICAO", "CPF_CANDIDATO", "SIGLA_PARTIDO",  "DESC_SIT_TOT_TURNO", "DESCRICAO_CARGO", "Winner", "Woman")
Federal <- unique(candidates[candidates$DESCRICAO_CARGO == "DEPUTADO FEDERAL", vars])
State <- unique(candidates[candidates$DESCRICAO_CARGO %in% c("DEPUTADO ESTADUAL", "DEPUTADO DISTRITAL"), vars])

# Identifies the first time that the candidate won state office
First.Win.State <- aggregate(ANO_ELEICAO ~ CPF_CANDIDATO, data=State[State$Winner, c("CPF_CANDIDATO", "ANO_ELEICAO")], FUN="min")
First.Win.State <- merge(First.Win.State, State[State$Winner, c("CPF_CANDIDATO", "ANO_ELEICAO")])
names(First.Win.State)[2] <- "State.Win.Year"

# Finds federal deputies who previous held state office
Federal <- merge(Federal, First.Win.State, by="CPF_CANDIDATO", all.x=T)
Federal$Held.State <- Federal$ANO_ELEICAO > Federal$State.Win.Year
Federal$Held.State[is.na(Federal$Held.State)] <- F

# Adds data on local government officials
Mayors <- candidates[candidates$DESCRICAO_CARGO == "PREFEITO" & candidates$Winner,]
Councilors <- candidates[candidates$DESCRICAO_CARGO == "VEREADOR" & candidates$Winner,]
ViceMayors <- candidates[candidates$DESCRICAO_CARGO == "VICE-PREFEITO" & candidates$Winner,]
First.Mayors <- aggregate(ANO_ELEICAO ~ CPF_CANDIDATO, data=Mayors, FUN="min")
First.Councilors <- aggregate(ANO_ELEICAO ~ CPF_CANDIDATO, data=Councilors, FUN="min")
First.Vice <- aggregate(ANO_ELEICAO ~ CPF_CANDIDATO, data=ViceMayors, FUN="min")
names(First.Mayors)[2] <- "Mayor.Win.Year"
names(First.Councilors)[2] <- "Council.Win.Year"
names(First.Vice)[2] <- "Vice.Win.Year"
Federal <- merge(Federal, First.Mayors, by="CPF_CANDIDATO", all.x=T)
Federal$Held.Mayor <- Federal$ANO_ELEICAO > Federal$Mayor.Win.Year
Federal$Held.Mayor[is.na(Federal$Held.Mayor)] <- F
Federal <- merge(Federal, First.Councilors, by="CPF_CANDIDATO", all.x=T)
Federal$Held.Council <- Federal$ANO_ELEICAO > Federal$Council.Win.Year
Federal$Held.Council[is.na(Federal$Held.Council)] <- F
Federal <- merge(Federal, First.Vice, by="CPF_CANDIDATO", all.x=T)
Federal$Held.Vice <- Federal$ANO_ELEICAO > Federal$Vice.Win.Year
Federal$Held.Vice[is.na(Federal$Held.Vice)] <- F
Federal$Held.Local <- Federal$Held.Mayor | Federal$Held.Council | Federal$Held.Vice
Federal$Held.Lower <- Federal$Held.Mayor | Federal$Held.Council | Federal$Held.Vice | Federal$Held.State

# Gets the first time that the federal candidate ran for that office
First.Run <- aggregate(ANO_ELEICAO ~ CPF_CANDIDATO, FUN="min", data=Federal)
First.Run <- merge(First.Run, Federal)

# Gets federal/state campaign donations data
donations <- readRDS("../1_data/raw/data_raw_CampaignDonationsFederalState.rds")
donations$CPF_CANDIDATO <- as.numeric(donations$CPF_CANDIDATO)
donations <- donations[donations$TIPO_RECEITA != "Recursos próprios",]
donations$Party.Funds <- ifelse(donations$TIPO_RECEITA %in% c("RECURSOS DE PARTIDO POLÍTICO", "Recursos de partido político"), donations$VALOR_RECEITA, 0)
donations$Candidate.Transfers <- ifelse(donations$TIPO_RECEITA %in% c("RECURSOS DE OUTROS CANDIDATOS/COMITÊS", "Recursos de outros candidatos/comitês", "Recursos de outros candidatos"), donations$VALOR_RECEITA, 0)
donations$All.Donations <- donations$VALOR_RECEITA
donations <- aggregate(cbind(All.Donations, Party.Funds, Candidate.Transfers) ~ CPF_CANDIDATO + ANO_ELEICAO + DESCRICAO_CARGO, FUN="sum", data=donations)

# Merges donations data into the federal candidate data
First.Run <- merge(First.Run, donations[donations$DESCRICAO_CARGO == "Deputado Federal", !names(donations) == "DESCRICAO_CARGO"], by=c("CPF_CANDIDATO", "ANO_ELEICAO"), all.x=T)

saveRDS(First.Run, "../1_data/analysis/data_analysis_CareerPathFederal.rds")



### Codes data on mid-term career shifts 
### (Used in appendix TABLE O.1)

candidates <- readRDS("../1_data/cleaned/data_cleaned_Candidates.rds")
vars <-  c("CPF_CANDIDATO", "DESC_SIT_TOT_TURNO", "DESCRICAO_CARGO", "DESCRICAO_SEXO", "ANO_ELEICAO", "Winner")
Federal.Candidates <- unique(candidates[candidates$DESCRICAO_CARGO == "DEPUTADO FEDERAL", vars])
Mayor.Candidates <- unique(candidates[candidates$DESCRICAO_CARGO == "PREFEITO", vars])
Federal.Deputies <- Federal.Candidates[Federal.Candidates$Winner,]
Mayors <- Mayor.Candidates[Mayor.Candidates$Winner,]

# Checks whether the deputy ran for mayor before their term in congress was complete
Mayor.Candidates$Year <- Mayor.Candidates$ANO_ELEICAO - 2
Mayor.Candidates$Mayoral.Run <- T
Federal.Deputies$Year <- Federal.Deputies$ANO_ELEICAO
Federal.Deputies <- merge(Federal.Deputies, Mayor.Candidates[, c("CPF_CANDIDATO", "Year", "Mayoral.Run")], by=c("CPF_CANDIDATO", "Year"), all.x=T)
Federal.Deputies$Mayoral.Run[is.na(Federal.Deputies$Mayoral.Run)] <- F

# Check whether the deputy was renominated in the next election
Next.Election <- Federal  # Note: the Federal dataframe object was created in the previous section ("Creates data for campaign donations and career-path analyses")
Next.Election$Year <- Next.Election$ANO_ELEICAO - 4
Next.Election$Renominated <- T
Federal.Deputies <- merge(Federal.Deputies, Next.Election[, c("CPF_CANDIDATO", "Year", "Renominated")], by=c("CPF_CANDIDATO", "Year"), all.x=T)
Federal.Deputies$Renominated[is.na(Federal.Deputies$Renominated)] <- F

Federal.Deputies <- Federal.Deputies[, c("CPF_CANDIDATO", "Year",  "DESCRICAO_SEXO", "Mayoral.Run", "Renominated")]

saveRDS(Federal.Deputies, "../1_data/analysis/data_analysis_CareerPathRenominationShifts.rds")
rm( list=ls() )





